#read in combined dataset
acs_data <- fread(here("/data/acs/combined_acs.csv"))
acs_data$GEOID <- as.character(acs_data$GEOID)
acs_counties <- filter(acs_data, NAME == "South Wasco County School District 1, Oregon" |
NAME == "Wasco County, Oregon"| NAME == "Hood River County, Oregon" |
NAME == "Sherman County, Oregon" | NAME == "Jefferson County, Oregon" |
NAME == "Skamania County, Washington" | NAME == "Klickitat County, Washington" |
NAME == "Oregon")
#get tract level geography
or_tracts <- tracts(state = "OR", county = c("Wasco", "Hood River", "Sherman", "Jefferson"),
cb = TRUE)
##
|
| | 0%
|
|= | 2%
|
|== | 3%
|
|=== | 5%
|
|==== | 6%
|
|===== | 8%
|
|====== | 9%
|
|======= | 10%
|
|======== | 12%
|
|========= | 12%
|
|========== | 14%
|
|=========== | 15%
|
|=========== | 16%
|
|============ | 18%
|
|============= | 19%
|
|============== | 20%
|
|=============== | 22%
|
|================ | 22%
|
|================= | 24%
|
|================= | 25%
|
|================== | 26%
|
|==================== | 28%
|
|==================== | 29%
|
|===================== | 30%
|
|====================== | 31%
|
|======================= | 33%
|
|======================== | 34%
|
|======================== | 35%
|
|========================= | 36%
|
|========================== | 37%
|
|=========================== | 39%
|
|============================ | 40%
|
|============================= | 41%
|
|============================== | 43%
|
|=============================== | 45%
|
|================================= | 46%
|
|================================= | 47%
|
|================================== | 49%
|
|=================================== | 50%
|
|==================================== | 51%
|
|===================================== | 53%
|
|======================================= | 55%
|
|======================================= | 56%
|
|======================================== | 57%
|
|========================================= | 59%
|
|========================================== | 60%
|
|=========================================== | 61%
|
|=========================================== | 62%
|
|============================================ | 64%
|
|============================================== | 65%
|
|============================================== | 66%
|
|=============================================== | 67%
|
|================================================ | 68%
|
|================================================= | 70%
|
|================================================== | 71%
|
|================================================== | 72%
|
|==================================================== | 74%
|
|===================================================== | 76%
|
|====================================================== | 78%
|
|======================================================= | 78%
|
|======================================================== | 80%
|
|======================================================== | 81%
|
|========================================================== | 82%
|
|=========================================================== | 84%
|
|============================================================ | 86%
|
|============================================================= | 87%
|
|============================================================== | 88%
|
|=============================================================== | 90%
|
|=============================================================== | 91%
|
|================================================================= | 92%
|
|================================================================= | 93%
|
|================================================================== | 95%
|
|=================================================================== | 96%
|
|==================================================================== | 97%
|
|===================================================================== | 98%
|
|===================================================================== | 99%
|
|======================================================================| 100%
wa_tracts <- tracts(state = "WA", county = c("Skamania", "Klickitat"),
cb = TRUE)
##
|
| | 0%
|
|= | 1%
|
|== | 3%
|
|=== | 4%
|
|==== | 5%
|
|===== | 6%
|
|===== | 8%
|
|====== | 8%
|
|======= | 10%
|
|======== | 12%
|
|========= | 13%
|
|========== | 14%
|
|=========== | 15%
|
|=========== | 16%
|
|============ | 17%
|
|============= | 18%
|
|============= | 19%
|
|============== | 20%
|
|=============== | 21%
|
|================ | 22%
|
|================ | 24%
|
|================= | 24%
|
|================== | 25%
|
|================== | 26%
|
|=================== | 27%
|
|==================== | 29%
|
|===================== | 29%
|
|====================== | 31%
|
|======================= | 33%
|
|======================== | 34%
|
|======================== | 35%
|
|========================= | 36%
|
|========================== | 37%
|
|=========================== | 38%
|
|=========================== | 39%
|
|============================ | 40%
|
|============================= | 41%
|
|============================= | 42%
|
|============================== | 43%
|
|=============================== | 44%
|
|================================ | 45%
|
|================================= | 46%
|
|================================= | 47%
|
|================================== | 48%
|
|=================================== | 50%
|
|==================================== | 52%
|
|===================================== | 52%
|
|====================================== | 54%
|
|====================================== | 55%
|
|======================================= | 56%
|
|======================================== | 57%
|
|======================================== | 58%
|
|========================================= | 59%
|
|========================================== | 60%
|
|=========================================== | 61%
|
|============================================ | 62%
|
|============================================ | 63%
|
|============================================= | 64%
|
|============================================== | 65%
|
|============================================== | 66%
|
|=============================================== | 67%
|
|================================================ | 68%
|
|================================================= | 69%
|
|================================================= | 71%
|
|================================================== | 71%
|
|=================================================== | 73%
|
|==================================================== | 75%
|
|===================================================== | 76%
|
|====================================================== | 77%
|
|======================================================= | 78%
|
|======================================================= | 79%
|
|======================================================== | 80%
|
|========================================================= | 81%
|
|========================================================= | 82%
|
|========================================================== | 83%
|
|=========================================================== | 84%
|
|============================================================ | 85%
|
|============================================================ | 86%
|
|============================================================= | 87%
|
|============================================================== | 88%
|
|============================================================== | 89%
|
|=============================================================== | 90%
|
|================================================================ | 92%
|
|================================================================= | 92%
|
|================================================================== | 94%
|
|=================================================================== | 96%
|
|==================================================================== | 97%
|
|==================================================================== | 98%
|
|===================================================================== | 99%
|
|======================================================================| 99%
|
|======================================================================| 100%
tract_geo <- rbind(or_tracts, wa_tracts)
acs_tracts <- acs_data %>% filter(grepl("Tract",NAME))
acs_tracts <- geo_join(tract_geo, acs_tracts, by = "GEOID")
######## USE THE FOLLOWING ##########
# color palette from : https://coolors.co/232d4b-2c4f6b-0e879c-60999a-d1e0bf-d9e12b-e6ce3a-e6a01d-e57200-fdfdfd
dspgpal = c("#232D4B", "#2C4F6B", "#0E879C", "#60999A", "#D1E0BF",
"#D9E12B", "#E6CE3A", "#E6A01D", "#E57200", "#ADB5BD")
barchart for 2018
ggplotly(ggplot(filter(acs_counties, year == 2018), aes(x = NAME, y = median_household_income,
text = paste0("Region: ", NAME,
"<br>Year: ", year,
"<br>Median Household Income: $", median_household_income,
"<br>Margin of Error: $", median_household_income_moe)))+
geom_col(fill = "dark blue")+
geom_errorbar(aes(x = NAME, ymin = median_household_income - median_household_income_moe,
ymax = median_household_income + median_household_income_moe), color = "dark orange") +
geom_point(color = "dark orange", size = 3)+ theme_minimal()+theme(axis.text.x = element_text(angle=45)) +
ggtitle("Median Household Income") + ylab("Median Household Income") + xlab("Region"), tooltip="text")
## Warning: 'bar' objects don't have these attributes: 'mode'
## Valid attributes include:
## 'type', 'visible', 'showlegend', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'meta', 'selectedpoints', 'hoverinfo', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'x', 'x0', 'dx', 'y', 'y0', 'dy', 'text', 'texttemplate', 'hovertext', 'hovertemplate', 'textposition', 'insidetextanchor', 'textangle', 'textfont', 'insidetextfont', 'outsidetextfont', 'constraintext', 'cliponaxis', 'orientation', 'base', 'offset', 'width', 'marker', 'offsetgroup', 'alignmentgroup', 'selected', 'unselected', 'r', 't', '_deprecated', 'error_x', 'error_y', 'xcalendar', 'ycalendar', 'xaxis', 'yaxis', 'idssrc', 'customdatasrc', 'metasrc', 'hoverinfosrc', 'xsrc', 'ysrc', 'textsrc', 'texttemplatesrc', 'hovertextsrc', 'hovertemplatesrc', 'textpositionsrc', 'basesrc', 'offsetsrc', 'widthsrc', 'rsrc', 'tsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
Try a line chart with all years present
# grouped line chart for all years: each geography is its own color
p <- ggplot(acs_counties, aes(x=year, y=median_household_income, group = NAME, color = NAME,
text = paste0("Region: ", NAME,
"<br>Year: ", year,
"<br>Median Household Income: $", median_household_income,
"<br>Margin of Error: $", median_household_income_moe))) +
geom_line() +
geom_point() +
scale_colour_manual(values = dspgpal) +
#geom_pointrange(aes(ymin=median_household_income - median_household_income_moe, ymax=median_household_income + median_household_income_moe)) +
theme_minimal() + ggtitle("Median Household Income 2015-2018") + ylab("Median Household Income") + xlab("Year")
#Note: Wasco and south wasco are from ACS5 year estimates. Moving averages.
ggplotly(p, tooltip = "text") %>% config(displayModeBar = "static", displaylogo = FALSE,
modeBarButtonsToRemove=list("zoom2d","select2d","lasso2d",
"hoverClosestCartesian", "hoverCompareCartesian","resetScale2d"))
Easier to see trends, but lots of colors make it quite busy. Only drawback to the line graphs as opposed to the bar charts is that margin of error cannot be seen visually. But the tooltip provided by plotly is great for keeping that data.
Same line chart but colors are only kept for south wasco, wasco county, and the state geography.
p <- ggplot(acs_counties %>% mutate(south_wasco = fct_other(NAME, keep = c("South Wasco County School District 1, Oregon", "Wasco County, Oregon", "Oregon"),
other_level = "Neighboring Counties"))
, aes(x=year, y=median_household_income, group = NAME, color = south_wasco,
text = paste0("Region: ", NAME,
"<br>Year: ", year,
"<br>Median Household Income: $", median_household_income,
"<br>Margin of Error: $", median_household_income_moe))) +
geom_line(size = 1.5) +
geom_point(size = 2) +
scale_colour_manual(name = "Region", values = c(dspgpal[1], dspgpal[9], dspgpal[2], dspgpal[10])) +
scale_alpha_manual(values=c(1,1,1,0.3)) +
#geom_pointrange(aes(ymin=median_household_income - median_household_income_moe, ymax=median_household_income + median_household_income_moe)) +
theme_minimal() + ggtitle("Median Household Income 2015-2018") + ylab("Median Household Income") + xlab("Year")
#Note: Wasco and south wasco are from ACS5 year estimates. Moving averages.
ggplotly(p, tooltip = "text") %>% config(displayModeBar = "static", displaylogo = FALSE,
modeBarButtonsToRemove=list("zoom2d","select2d","lasso2d",
"hoverClosestCartesian", "hoverCompareCartesian","resetScale2d"))
#n
Much cleaner looking plot! It's obvious where south wasco ranks in relation to the rest of the counties and geographies. We see that South wasco has the lowest median houshold income, but is very similar to that of Sherman county. Generally, the state of Oregon is seeing a steading increase in median household income, however, south wasco's growth pattern has just started increasing from 2016 to now.
Rather than just looking at median income, we can see where the rest of the community is distributed in household income.
#grouped bar charts
income <- select(filter(acs_counties, year == 2018), NAME, contains("income"))
income <- income %>% select(!contains("moe"), -median_household_income)
income <- melt(income, id.vars = "NAME", measure.vars = colnames(income)[-1])
ggplotly(ggplot(income)+
geom_col(aes(x = NAME, y = value, fill = variable), position = "dodge")+
scale_fill_discrete(name = "Income Bracket", labels = c("Less than 10,000", "10,000-14,999", "15,000-24,999",
"25,000-34,999", "35,000-49,999", "50,000-74,999",
"75,000-99,999","100,000-149,999", "150,000-199,999", "above 200,000")) +
scale_colour_manual(name = "Income Bracket", values = dspgpal) +
theme_minimal()+theme(axis.text.x = element_text(angle=30)) +
ylab("% of Population") + xlab("Region") +
ggtitle("Income Distribution for 2018"))
The plot is quite dense, but we can see how the distributions of income compare across the different counties and geographies. South Wasco has close to 50% of their population of households earning between 35,000 and 74,999 dollars. But they do have some of the highest percentages of households in the lowest income bracket along with skamania, klickitat and jefferson county.
Trying to visualize with a stacked bar chart
#stacked bar charts
ggplotly(ggplot(income, aes(fill=variable, y=value, x=NAME))+
geom_bar(position = position_stack(reverse = TRUE), stat="identity")+
scale_fill_discrete(name = "Income Bracket", labels = c("Less than 10,000", "10,000-14,999", "15,000-24,999",
"25,000-34,999", "35,000-49,999", "50,000-74,999",
"75,000-99,999","100,000-149,999", "150,000-199,999", "above 200,000")) +
ylab("% of Population") + xlab("Region") +
theme_minimal()+theme(axis.text.x = element_text(angle=30)) +
ggtitle("Income Distribution for 2018"))%>% config(displayModeBar = "static", displaylogo = FALSE,
modeBarButtonsToRemove=list("zoom2d","select2d","lasso2d",
"hoverClosestCartesian", "hoverCompareCartesian","resetScale2d"))
This stacked bar chart condenses the busy plot of distributions from the previous grouped bar. It is easier to see where the heavier densities are at the income extremes.
Line chart for federal Poverty rates in 2018
ggplotly(ggplot(filter(acs_counties, year == 2018), aes(x = NAME, y = below_poverty,
text = paste0("Region: ", NAME,
"<br>Year: ", year,
"<br>Percent Below Federal Poverty: ", below_poverty, "%",
"<br>Margin of Error: ", below_poverty_moe, "%"))) +
geom_col(fill = "dark blue") +
geom_errorbar(aes(x = NAME, ymin = below_poverty - below_poverty_moe,
ymax = below_poverty + below_poverty_moe), color = "dark orange") +
geom_point(color = "dark orange", size = 3) + theme_minimal() + theme(axis.text.x = element_text(angle=30)) +
xlab("Region") + ylab("% Below Poverty") + ggtitle("% of Population Below Federal Poverty Line"), tooltip = "text") %>%
config(displayModeBar = "static", displaylogo = FALSE,
modeBarButtonsToRemove=list("zoom2d","select2d","lasso2d","hoverClosestCartesian", "hoverCompareCartesian","resetScale2d"))
## Warning: 'bar' objects don't have these attributes: 'mode'
## Valid attributes include:
## 'type', 'visible', 'showlegend', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'meta', 'selectedpoints', 'hoverinfo', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'x', 'x0', 'dx', 'y', 'y0', 'dy', 'text', 'texttemplate', 'hovertext', 'hovertemplate', 'textposition', 'insidetextanchor', 'textangle', 'textfont', 'insidetextfont', 'outsidetextfont', 'constraintext', 'cliponaxis', 'orientation', 'base', 'offset', 'width', 'marker', 'offsetgroup', 'alignmentgroup', 'selected', 'unselected', 'r', 't', '_deprecated', 'error_x', 'error_y', 'xcalendar', 'ycalendar', 'xaxis', 'yaxis', 'idssrc', 'customdatasrc', 'metasrc', 'hoverinfosrc', 'xsrc', 'ysrc', 'textsrc', 'texttemplatesrc', 'hovertextsrc', 'hovertemplatesrc', 'textpositionsrc', 'basesrc', 'offsetsrc', 'widthsrc', 'rsrc', 'tsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
ggplot() +
geom_sf(data = filter(acs_tracts, year == 2018), aes(fill = below_poverty)) +
geom_sf(fill = "transparent", color = "gray20", size = 1,
data = acs_tracts %>% group_by(COUNTYFP) %>% summarise()) + theme_minimal() +
labs(title = paste("Percent of population below poverty by census track in", 2018, sep=" "))
## `summarise()` ungrouping output (override with `.groups` argument)
static bar chart
# bar graphs
ggplot(filter(acs_counties, year == 2018), aes(x = NAME, y = employment_20_to_64)) +
geom_col(fill = "dark blue")+
geom_errorbar(aes(x = NAME, ymin = employment_20_to_64 - employment_20_to_64_moe,
ymax = employment_20_to_64 + employment_20_to_64_moe), color = "dark orange") +
theme_minimal() + theme(axis.text.x = element_text(angle=30)) +
geom_point(color = "dark orange", size = 3) + ggtitle("% of Adults (20-64) with Employment Status")
static map
# sf map
ggplot() +
geom_sf(data = filter(acs_tracts, year == 2018), aes(fill = employment_20_to_64)) +
labs(title = "Percent of employed adults adults 20 to 64 by census track") #+
p <- ggplot(acs_counties %>% mutate(south_wasco = fct_other(NAME, keep = c("South Wasco County School District 1, Oregon", "Wasco County, Oregon", "Oregon"),
other_level = "Neighboring Counties"))
, aes(x=year, y=affordable_housing_all_perc, group = NAME, color = south_wasco,
text = paste0("Region: ", NAME,
"<br>Year: ", year,
"<br>Affordable Housing: ", round(affordable_housing_all_perc, digits = 1), "%"))) +
geom_line(size = 1.5) +
geom_point(size = 2) +
scale_colour_manual(name = "Region", values = c(dspgpal[1], dspgpal[9], dspgpal[2], dspgpal[10])) +
#geom_pointrange(aes(ymin=median_household_income - median_household_income_moe, ymax=median_household_income + median_household_income_moe)) +
theme_minimal() + ggtitle("Affordable Housing 2015-2018") + ylab("Affordable Housing") + xlab("Year")
#Note: Wasco and south wasco are from ACS5 year estimates. Moving averages.
ggplotly(p, tooltip = "text") %>% config(displayModeBar = "static", displaylogo = FALSE,
modeBarButtonsToRemove=list("zoom2d","select2d","lasso2d",
"hoverClosestCartesian", "hoverCompareCartesian","resetScale2d"))
housing <- select(filter(acs_counties, year == 2018), NAME, contains("affordable_housing"))
housing_rent_own <- housing %>% select(NAME, contains("perc"))
housing_rent_own <- melt(housing_rent_own, id.vars = "NAME", measure.vars = colnames(housing_rent_own)[-c(1,4)])
#grouped bar chart for own and rent occupancy
ggplotly(ggplot(housing_rent_own, aes(x = NAME, y = value, fill = variable),
text = paste0("Region: ", NAME,
"<br>Year: ", year,
"<br>Affordable Housing: ", round(affordable_housing_all_perc, digits = 1), "%")) +
geom_col(position = "dodge") +
scale_fill_discrete(name = "Housing Ownership", labels = c("Own", "Rent")) +
theme_minimal() + theme(axis.text.x = element_text(angle=30)) +
ylab("% of Occupied housing units") + xlab("Region") +
ggtitle("Affordable Housing 2015-2018", subtitle = "Occupied households where monthly costs are less than 30% of houshold income"), tooltip = "text")
#divergent bar chart to split up own and rent occupancy
housing_diverge <- housing_rent_own %>% mutate(value = as.numeric(ifelse(variable == "affordable_housing_own_perc",
value, -1*value)))
ggplotly(ggplot(housing_diverge,
aes(x = NAME, y = value, fill = variable,
text = paste0("Region: ", NAME,
"<br>Year: ", 2018,
"<br>Affordable Housing: ", round(abs(value), digits = 1), "%")))+
geom_bar(stat = "identity") +
scale_y_continuous(breaks = pretty(housing_diverge$value), labels = abs(pretty(housing_diverge$value))) +
scale_colour_manual(name = "Housing Ownership", values = c(dspgpal[1], dspgpal[9])) +
scale_fill_discrete(name = "Housing Ownership", labels = c("Own", "Rent")) +
theme_minimal() + labs(x="Region",y="% of Occupied Housing Units") +
coord_flip(), tooltip = "text") %>% layout(title = list(text = paste0("Affordable Housing 2015-2018",
'<br>','<sup>',
"% of occupied households where monthly costs are less than 30% of houshold income",
'</sup>')))
% of affordable housing by household income bracket
housing <- select(filter(acs_counties, year == 2018), NAME, contains("affordable_housing"))
housing_by_income <- housing %>% select(NAME, !contains("perc") & !contains("total"))
housing_by_income <- melt(housing_by_income, id.vars = "NAME", measure.vars = colnames(housing_by_income)[-c(1,4)])
#grouped bar chart for own and rent occupancy
ggplotly(ggplot(housing_by_income, aes(x = NAME, y = value, fill = variable),
text = paste0("Region: ", NAME,
"<br>Year: ", year,
"<br>Affordable Housing: ", round(value, digits = 1), "%")) +
geom_col(position = "dodge") +
#scale_fill_discrete(name = "Housing Ownership", labels = c("Own", "Rent")) +
theme_minimal() + theme(axis.text.x = element_text(angle=30)) +
ylab("% of Occupied housing units") + xlab("Region") +
ggtitle("Affordable Housing 2015-2018", subtitle = "Occupied households where monthly costs are less than 30% of houshold income"), tooltip = "text")
Social
Racial Diversity
Family Stability